
*************************************************************************************************
/*																								
	Purpose: Append 77-18 income score files (1a,1b,1c) and then 			
				merge onto full GSS-provided dataset (1972-2018).				
	Creates: GSS_allyears_foranalysis.dta									
*/																								
*************************************************************************************************

clear
set more off
set maxvar 30000

cd "$Mydirectory1/1_DataSources/GSS/"

*------------------------------------------------------------------------------*

******************
*** APPEND THE INCOME SCORE DATA 
******************

use ./output/GSS_10to18_analysis.dta, clear
tempfile latestyears
save `latestyears'

use ./output/GSS_88to10_analysis.dta, clear
keep if year>1990
tempfile lateryears
save `lateryears'

use ./output/GSS_77to90_analysis.dta, clear
append using `lateryears'
append using `latestyears'

drop age
tempfile occs 
save `occs'

*------------------------------------------------------------------------------*

******** BRING IN DATA AND MERGE INCOME SCORES ********

use ./input/GSS7218_R1.dta, clear //download from GSS website

ren *, lower

keep if year>=1977 
keep if age>=30 & age<=50

sort year id
merge 1:1 year id using `occs'
drop _merge

* Foreign-born (available from 1977 onwards)
	gen foreignborn = born==2 if year>=1977
	label var foreignborn "Respondent not born in US"
	keep if (foreignborn==0 | foreignborn==.) 

/* Note: The 1982 and 1987 GSSs included oversamples of Black respondents. To adjust statistical 
		 results for this oversampling, one may either exclude cases in the black oversamples 
		 (codes 4, 5, and 7 on variable SAMPLE) or weight statistical results using weights in 
		 variable OVERSAMP (https://gssdataexplorer.norc.org/pages/show?page=gss%2Ffaq).
*/
	tab year if sample==4 | sample==5 | sample==7
	drop if sample==4 | sample==5 | sample==7
	
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

*********************
**** DEMOGRAPHICS 
*********************

* Age squared 
	gen agesq = age* age

* Respondent is the wife
	gen wife = sex==2 & marital==1
	tab spocc if wife==0 & sex==2, m
	
* Employed
	tab wrkstat, m
	replace wrkstat=. if wrkstat==.n
	gen employed = wrkstat ==1 | wrkstat==2 if wrkstat<.
	label var employed "Employed"
	
* Spouse employed
	tab spwrksta, m
	replace spwrksta=. if spwrksta==.i | spwrksta==.n
	gen employed_spouse = spwrksta==1 | spwrksta==2 if spwrksta<.
	label var employed_spouse "Spouse is employed"

* Race 
	replace race=. if race>2
	gen black = race==2 if race<.
	label var black "Black"
		
* Female 
	gen female=sex==2  
	label var female "Female" 
	
* Married 
	gen married= marital==1  
	replace married =. if marital==. | marital==.n
	tab married, m
	label var married "Married"
	
* Never-married 
	gen never_married = (marital==5) if marital!=.n
	tab never_married, m
	
* Widowed 
	drop widowed //GSS-provided variable for "ever been widowed"

	gen widowed = (marital==2) if marital!=.n
	tab widowed, m
	
* Divorced 
	gen divorced = (marital==3) if marital!=.n
	tab divorced, m 
	
* Separated 
	gen separated = (marital==4) if marital!=.n
	tab separated, m
	
* Parents' place of birth 
	tab parborn, m
	gen oneparent_foreign = parborn==1 | parborn==2 | parborn==4 | parborn==6 | parborn==8 if parborn<9
	gen parentforeign = oneparent_foreign==1 if year>=1977
	replace parentforeign = . if (parborn==7 | parborn==.n) & year>=1977 //"don't know for both" or no answer
	label var parentforeign "At least one parent not born in US"
	
	gen fatherforeign=0
	replace fatherforeign=. if parborn==3 | parborn==4 | parborn==7 | parborn==.i | parborn==.n
	replace fatherforeign=1 if parborn==1 | parborn==6 | parborn==8
	label var fatherforeign "Father not born in US"
	
* Union households
	gen union_hh = union<4 if union<.
	label var union_hh "Union household (R, spouse, or both)"
	
	gen unionR = union==1 | union==3 if union<.
	label var unionR "Respondent in union"

* Veteran 
	tab year memvet, m nol
	replace memvet=. if memvet==.n | memvet==.i | memvet==.d
	gen vetgroup = memvet==1 if memvet<.
	
	tab year vetyears, m nol
	replace vetyears=. if vetyears==.n | vetyears==.i
	gen any_armedforces = vetyears>0 & vetyears<=4 if vetyears<. //some years in armed forces
	
	gen veteran = any_armedforces==1 if any_armedforces<.
	label var veteran "Veteran"
	
*********************
*** REGION
*********************

	gen region4=.
	replace region4=1 if region==1 | region==2
	replace region4=2 if region==3 | region==4
	replace region4=3 if region==5 | region==6 | region==7
	replace region4=4 if region==8 | region==9
	tab region4, m
	
	gen region4_childhood=.
	replace region4_childhood=1 if reg16==1 | reg16==2 /*northeast*/
	replace region4_childhood=2 if reg16==3 | reg16==4 /*midwest*/
	replace region4_childhood=3 if reg16==5 | reg16==6 | reg16==7 /*south*/
	replace region4_childhood=4 if reg16==8 | reg16==9 /*west*/
	tab region4_childhood, m
	
	label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
	label values region4 region4_childhood region_l
	tab region4_childhood, m
	tab region4, m

* Moved
	tab region, m
	tab reg16, m
	
	gen moved_region = region!=reg16
	label var moved_region "Moved region b/w age16 and interview"
	
	gen moved_city = mobile16==2 | mobile16==3 if mobile16<4
	label var moved_city "Moved cities since age16"
	
	gen moved_state = mobile16==3 if mobile16<4
	label var moved_state "Moved state since age16"
	
* South-related

	gen south = region>=5 & region<=7 //south atlantic, east south central, west south central
	gen nonsouth = 1 - south
	
	gen south_raised = reg16>=5 & reg16<=7 //south atlantic, east south central, west south central
	
	gen migration=0
	replace migration=1 if south_raised==1 & nonsouth==1
	label var migration "Great Migration"
	
* Urban place
	gen urban = size>=10
	label var urban "Place of interview has 10,000+ people"
	
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

*********************
**** EDUCATION 
*********************
	
* Put education into buckets using highest year of school completed 
	gen eduR=.
	replace eduR=0 if educ==0
	replace eduR=1 if educ>=1 & educ<8 //some grade school
	replace eduR=2 if educ==8 //completed grade school
	replace eduR=3 if educ>8 & educ<=11 //high school dropout
	replace eduR=4 if educ==12 //high school degree
	replace eduR=5 if educ>=13 & educ<=15 //some college
	replace eduR=6 if educ>=16 & educ<=20 //BA (16 yrs) or more
	label var eduR "Education, binned for respondent"
	
	gen hs_ed=eduR>=4 if eduR<. 
	gen coll_ed=eduR>=6 if eduR<.
	label var hs_ed "Respondent HS educated" 
	label var coll_ed "Respondent college educated"
	
* Years of school
	rename educ yrsschool
	tab yrsschool, m nol 
	replace yrsschool=. if yrsschool==.n | yrsschool==.d | yrsschool==.a
	
	gen yrsschool_bin=.
	replace yrsschool_bin=0 if yrsschool==0
	replace yrsschool_bin=6 if yrsschool>0 & yrsschool<8
	replace yrsschool_bin=8 if yrsschool==8
	replace yrsschool_bin=10 if yrsschool>=9 & yrsschool<=11
	replace yrsschool_bin=12 if yrsschool==12
	replace yrsschool_bin=14 if yrsschool>=13 & yrsschool<=15
	replace yrsschool_bin=16 if yrsschool>=16 & yrsschool<.
	label var yrsschool_bin "Years of school, binned"

* Educational categories for father 
	gen edu_dad=.
	replace edu_dad=0 if paeduc==0 //none
	replace edu_dad=1 if paeduc>=1 & paeduc<=7 //some grade school
	replace edu_dad=2 if paeduc==8 //completed 8th grade
	replace edu_dad=3 if paeduc>=9 & paeduc<=11 //some HS
	replace edu_dad=4 if paeduc==12 //4 years of HS
	replace edu_dad=5 if paeduc>=13 & paeduc<=15 //some college
	replace edu_dad=6 if paeduc>=16 & paeduc<=20 //college
	label var edu_dad "Educational categories for dad" 
	
	gen dad_hs_ed = edu_dad>=4 & edu_dad<.
	gen dad_coll_ed = edu_dad>=6 & edu_dad<.
	label var dad_hs_ed "Dad HS educated" 
	label var dad_coll_ed "Dad College educated"
	
	gen edu_dad_bin=0 if edu_dad==0 
	replace edu_dad_bin=6 if edu_dad==1 
	replace edu_dad_bin=8 if edu_dad==2 
	replace edu_dad_bin=10 if edu_dad==3 
	replace edu_dad_bin=12 if edu_dad==4 
	replace edu_dad_bin=14 if edu_dad==5 
	replace edu_dad_bin=16 if edu_dad==6 
	tab edu_dad_bin, m
	label var edu_dad_bin "Dad, Years of schooling usin bins"
		
* Educational categories for mother 
	gen edu_mom=.
	replace edu_mom=0 if maeduc==0 //none
	replace edu_mom=1 if maeduc>=1 & maeduc<=7 //some grade school
	replace edu_mom=2 if maeduc==8 //completed 8th grade
	replace edu_mom=3 if maeduc>=9 & maeduc<=11 //some HS
	replace edu_mom=4 if maeduc==12 //4 years of HS
	replace edu_mom=5 if maeduc>=13 & maeduc<=15 //some college
	replace edu_mom=6 if maeduc>=16 & maeduc<=20 //college
	label var edu_mom "Educational categories for mom" 
	
	gen mom_hs_ed = edu_mom>=4 & edu_mom<.
	gen mom_coll_ed = edu_mom>=6 & edu_mom<.
	label var mom_hs_ed "Mom HS educated" 
	label var mom_coll_ed "Mom College educated"

	gen edu_mom_bin=0 if edu_mom==0 
	replace edu_mom_bin=6 if edu_mom==1 
	replace edu_mom_bin=8 if edu_mom==2 
	replace edu_mom_bin=10 if edu_mom==3 
	replace edu_mom_bin=12 if edu_mom==4 
	replace edu_mom_bin=14 if edu_mom==5 
	replace edu_mom_bin=16 if edu_mom==6 
	tab edu_mom_bin, m
	label var edu_mom_bin "Mom, Years of schooling usin bins"
	
	rename paeduc yrsschool_dad	
	rename maeduc yrsschool_mom
	
	foreach a in dad mom {
	replace yrsschool_`a' =. if yrsschool_`a'>20
	tab yrsschool_`a'
	}

*************
* Siblings *
*************

* # of brothers and sisters--not available.

* # of siblings
/* Note: includes biological, step, and adopted brothers and sisters. 
Also includes siblings born alive but no longer living. */
	replace sibs =. if sibs==.d | sibs==.i | sibs==.n
	ren sibs R_num_siblings
	label var R_num_siblings "# of R's siblings"

*****************
* Own fertility *
*****************

* # (living) boys--only available in 1994 

	foreach num of numlist 1/9 {
	
	tab kdsex`num',m 
	replace kdsex`num' =. if inlist(kdsex`num',.i,.n) 
	tab kdsex`num',m 
	
	//generate dummy =1 if kid is a boy
	gen R_kid`num'_boy = (kdsex`num'==1) if kdsex`num'<.
	tab kdsex`num',m 
	tab R_kid`num'_boy,m 
	
	//generate dummy =1 if kid is a girl
	gen R_kid`num'_girl = (kdsex`num'==2) if kdsex`num'<.
	tab kdsex`num',m 
	tab R_kid`num'_girl,m 
		
	}

	egen R_num_boys_living = rowtotal(*_boy), missing //total number of living male kids in 1994
	
* Flag indeterminate (high) # of boys--not available

* # (living) girls--only available in 1994
	egen R_num_girls_living =rowtotal(*_girl), missing //total number of living female kids in 1994

* Flag indeterminate (high) # of girls--not available

* # of living kids--only available in 1994
	gen R_numkids_living =.
	replace R_numkids_living = R_num_girls_living + R_num_boys_living if (R_num_girls_living!=. & R_num_boys_living!=.)
	replace R_numkids_living = R_num_girls_living if (R_num_girls_living!=. & R_num_boys_living==.)
	replace R_numkids_living = R_num_boys_living if (R_num_girls_living==. & R_num_boys_living!=.)
	tab R_numkids_living, m
	label var R_numkids_living "# of R's kids who are living"

* # of kids ever
	replace childs =. if childs==.d 
	
	ren childs R_numkids_ever
	label var R_numkids_ever "# of kids that R has ever had (living or deceased)"

* flag: R has an indeterminate (high) # of kids ever
	gen flag_R8plus_kidsever = (R_numkids_ever==8) if R_numkids_ever!=.
	tab R_numkids_ever, m
	tab flag_R8plus_kidsever,m 
	label var flag_R8plus_kidsever "Dummy =1 if R has an indeterminate (8+) # of kids ever"
	
* # of deceased kids--not available.

* Dummy: has R ever had kids
	gen R_kids_ever = R_numkids_ever!=0 if R_numkids_ever<.
	tab R_kids_ever,m 
	label var R_kids_ever "Dummy=1 if R has ever had kids"

*Dummy: Does R have kids right now? 
/*Note: There will be a lot of missings because R_numkids_living is only available in 1994. */
	gen R_kids_now = R_numkids_living!=0 if R_numkids_living<. 
	tab R_kids_now,m 

*********************************
* # of persons in R's household *
*********************************

* # of kids (of any age) living in R's hh
	
	replace rplace =. if rplace==.n
	
	foreach num of numlist 1/14 {
	
		tab relate`num', m
		replace relate`num'=. if (relate`num'==.i | relate`num'==.n)
		tab relate`num', m
		
		gen person`num'_kid`num' =.
		
		//Dummy =1 if a given hh member is R's child. Only look at Rs who are hh head or the spouse of hh head.
		replace person`num'_kid`num' = (relate`num'==3) if relate`num'<. & (rplace==1 | rplace==2)
		
		tab person`num'_kid`num', m
		tab rplace if relate`num'==3, m 	
		assert person`num'_kid`num'!=1 if rplace>2
	}
	
	drop person30 //avoid including this variable in egen rowtotal
	d person* //only person`num'_kid`num' variables come up
	
	egen R_totnumkids_livinginhh = rowtotal(person*), missing
	tab R_totnumkids_livinginhh,m 
	assert R_totnumkids_livinginhh ==. if rplace>2 
	label var R_totnumkids_livinginhh "Total # of kids (of any age) living in R's hh"

*************
*************

* # of children 0-17 living in R's hh 
	
	foreach num of numlist 1/14 {
		tab old`num',m 
		replace old`num' =. if (old`num'==.i | old`num'==.d | old`num'==.n)
		
		gen person`num'_lessthan18 = (old`num'<18) if old`num'<.
		tab person`num'_lessthan18,m 
		tab old`num',m 
		
		gen kid`num'_0to17 = (person`num'_lessthan18==1 & person`num'_kid`num'==1) if person`num'_lessthan18!=. & person`num'_kid`num'!=.
		tab kid`num'_0to17, m 
	}
	
	egen R_totnumkids_0to17_livinginhh = rowtotal(*_0to17), missing
	tab R_totnumkids_0to17_livinginhh,m 
	
	drop person* *_0to17 *_boy *_girl
	
*************
*************

*total # of people living in R's household
	replace hompop =. if hompop==.n 
	
	ren hompop R_hhsize_plusR //R appears to be included in "hompop": number of persons in household
	
	gen R_hhsize_minusR = R_hhsize_plusR -1
	tab R_hhsize_minusR,m 
	
	label var R_hhsize_plusR "total # of persons in R's hh (including R)"
	label var R_hhsize_minusR "total # of persons in R's hh (NOT including R)"		

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*
	
*********************
*** BIRTH YEAR  
*********************

	tab dateintv, m 
	levelsof year, local(years2)
	gen dob=.
	foreach x of local years2{
	replace dob=`x'-age-1 if year==`x' //Subtract 1 year to reflect that most interviews happen in first half of year.
	}
	tab dob  

* Assign everyone the decade in which they were born.
	gen decade=.
	replace decade=1920 if dob>=1920 & dob<=1929
	replace decade=1930 if dob>=1930 & dob<=1939
	replace decade=1940 if dob>=1940 & dob<=1949
	replace decade=1950 if dob>=1950 & dob<=1959
	replace decade=1960 if dob>=1960 & dob<=1969
	replace decade=1970 if dob>=1970 & dob<=1979
	replace decade=1980 if dob>=1980 & dob<=1989
	label var decade "Decade of birth"
	
* Generate dummies for each decade
	tab decade, gen(decade_)


*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

*******************
*** FATHER INCOME 
*******************
	
**** KEY moment **** 
/*	
	We have three measures of father occupation. Choose 2010 occupation codes as 
    main father occupation measure, but replace with other available measures
    if 2010 father occupation is missing.
*/

	lookfor fatheroccej

	tab fatheroccej10, m
	replace fatheroccej10 = fatheroccej_70 if year<=1990 & fatheroccej10==.
	replace fatheroccej10 = fatheroccej_80 if (year>1990 & year<=2010) & fatheroccej10==.
	rename fatheroccej10 fatheroccej
	label var fatheroccej "Father coarsened occupation"	

*******************
*** MOTHER INCOME 
*******************

	tab motheroccej_10, m
	replace motheroccej_10 = motheroccej_80 if (year>1990 & year<=2010) & motheroccej_10==.
	ren motheroccej_10 motheroccej
	label var motheroccej "Mother coarsened occupation"

** Dummies for head of household when R was growing up (i.e., when R was 16)
	replace family16 =. if family16 ==.i | family16 ==.n
	
	gen headofhh_father =.
	replace headofhh_father =1 if inlist(family16, 1,2,3,4) 
	replace headofhh_father =0 if family16!=. & headofhh_father==. //0 = "other"
	tab headofhh_father,m 
	
	gen headofhh_mother =.
	replace headofhh_mother =1 if family16==5
	replace headofhh_mother =0 if family16!=. & headofhh_mother==. 
	tab headofhh_mother, m
	
	gen headofhh_othermale =.
	replace headofhh_othermale =1 if (family16==6 | family16==8) //R lived with male relatives OR combo of male and female relatives.
	replace headofhh_othermale =0 if family16!=. & headofhh_othermale==. 
	tab headofhh_othermale, m
	
	gen headofhh_otherfemale =. 
	replace headofhh_otherfemale =1 if family16==7
	replace headofhh_otherfemale =0 if family16!=. & headofhh_otherfemale==. 
	tab headofhh_otherfemale, m
	
	//Alternate dummy for father as head of hh during R's childhood.  
	/*Note: When R reports occupation of a parent but does not report 
			who R lived with when growing up, will assume that R lived 
			with father.*/
	gen headofhh_father_imputed = headofhh_father
	replace headofhh_father_imputed =1 if fatheroccej!=. & family16==. 
	tab headofhh_father_imputed ,m 
	tab headofhh_father, m
	label var headofhh_father_imputed "Impute dad when parent occ != missing & no info about hh head at age 16"	

	
*****************************************************************************************************
* DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) *
*****************************************************************************************************

	gen father_notworking =.
	
	tab paocc10 if year>2010 & fatheroccej==., m
	tab paocc80 if (year>1990 & year<=2010) & fatheroccej==., m
	tab paocc16 if year<=1990 & fatheroccej==., m
	
	replace father_notworking =0 if fatheroccej!=.
	tab father_notworking,m 
	
	 
	gen mother_notworking =.
	
	tab maocc80 if (year>1990 & year<=2010) & motheroccej==., m
	tab maocc10 if (year>2010) & motheroccej==., m
	
	replace mother_notworking =1 if maocc80==.i & (year>1990 & year<=2010) & !inlist(family16,0,4,6,.) & motheroccej==.
	replace mother_notworking =1 if maocc10==.i & (year>2010) & !inlist(family16,0,4,6,.) & motheroccej==.
	replace mother_notworking =0 if motheroccej!=.
	tab mother_notworking, m 	

	* Variable for father being either farm laborer or operator
	gen fatherfarm=0
	replace fatherfarm=. if fatheroccej==.
	replace fatherfarm=1 if fatheroccej==71 | fatheroccej==81
	label var fatherfarm "Father in ag occupation"
	
*******************
*** RESPONDENT OCCUPATIONAL INCOME 
*******************
	
* Respondent and spouse occupational categories--replace if missing

foreach x in occR occSP {
	tab `x'_10, m
	replace `x'_10 = `x'_70 if year<=1990 & `x'_10==.
	replace `x'_10 = `x'_80 if (year>1990 & year<=2010) & `x'_10==.
	rename `x'_10 `x'ej
	label var `x'ej "Coarsened occupation"
	}
	
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

***************
*** INTERACTION TERMS
*************** 
	
	rename fatherforeign Fforeign
	global covariate_list "female black union_hh unionR veteran married foreignborn Fforeign urban hs_ed coll_ed"

****De-mean variables
	foreach var in $covariate_list  {
	sum `var'
	gen `var'_dm = `var'- `r(mean)'
	local temp: var label `var' 
	label var `var'_dm "`temp', demeaned" 
	}


	
***************
*** SAVE
***************	
	
	*Rename to match other surveys
	rename Fforeign fatherforeign
	rename faminc fam_inc 
	rename south_raised bornsouth
	
	*Unique identifier in GSS: year + id
	tostring year id, gen(stryear strid)
	egen id_temp = concat(strid stryear)
	
	destring id_temp, gen(id_gss)

	duplicates report id_gss //no duplicates
	drop stryear strid
	
	*Rename weight variable
	ren wtssal weight_gss
	order id_gss weight_gss
	sort id_gss
	
	tab age
	compress 
	save ./output/GSS_allyears_foranalysis.dta, replace
	
